from lxml import etree
import requests
import csv

def get_res(url):
    headers={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
    response=requests.get(url,headers=headers)
    return response.text

def get_info():
    format_url='https://book.douban.com/top250?start={}'
    urls=[format_url.format(str(i)) for i in range(0,250,25)]
    for url in urls:
        selector=etree.HTML(get_res(url))
        infos=selector.xpath('//tr[@class="item"]')
        for info in infos:
            name=info.xpath('td[2]/div[1]/a/@title')[0]
            url_info=info.xpath('td[2]/div[1]/a/@href')[0]

            book_info=info.xpath('td[2]/p[1]/text()')[0]
            price=book_info.split('/')[-1]
            date=book_info.split('/')[-2]
            publisher=book_info.split('/')[-3]
            author=book_info.split('/')[0]
            rate=info.xpath('td[2]/div[2]/span[2]/text()')[0]
            comments=info.xpath('td[2]/p[2]/span/text()')
            comment = comments[0] if len(comments) != 0 else '空'
            writer.writerow ((name,url_info,author,publisher, date,price,
        rate, comment))
            #print(comment)


if __name__=='__main__':
    fp = open('file/doubanbook.csv', 'wt', newline='', encoding='utf_8_sig')

    writer = csv.writer(fp)
    writer.writerow(('name', 'url', 'author', 'publisher', 'date', 'price',
                     'rate', 'comment'))
    get_info()
    fp.close()